#!/usr/bin/env python
# -*- coding:utf-8 -*-

import os
import re
import get_html


def get_all_url(page_num, base_url):

    for i in range(1, page_num+1):
        url = base_url
        url += str(i)
        print url
        html = get_html.get_html(url)
        if html:
            ff = open('./html/' + str(i), 'w')
            ff.write(html)
            print 'write html succeed'
        else:
            print 'write html error'


def get_items():

    result_file = open('./items.txt', 'w')

    for i in os.listdir('./html'):
        html = open('./html/' + str(i), 'r').read()
        items = re.findall(r'value="CompareItem_(.*?)"', html)
        print len(items)
        for item in items:
            result_file.write(item + '\n')


if __name__ == '__main__':

    get_all_url(page_num=30, base_url='http://www.newegg.com/Product/ProductList.aspx?Submit=ENE&N=100008241%204814%204017%204018%204019%204020%204023%204026%204027%204084%204092&IsNodeId=1&bop=And&PageSize=60&order=BESTMATCH&Page=')

    get_items()

    os.system('sort ./items.txt|uniq > ./items_last.txt')
